#!/usr/bin/env Rscript

rm(list = ls())

options(stringsAsFactors=FALSE)

suppressMessages(library(plyr))
suppressMessages(library(classInt))
suppressMessages(library(stargazer))

## voter record, not provided
## DATA <- read.csv(
##     "formattedByIndividual_Cali3AllYears_byPID_filtered_movers_selectvars2_withzipcodes_fixed.csv.gz"
##     )

DATA <- subset(
    DATA, (mover4 == 1)
    )

DATA$id14 <- with(DATA, paste(localitycode_cali314, registrantid_cali314,sep="\t"))
DATA$id12 <- with(DATA, paste(localitycode_cali312, registrantid_cali312,sep="\t"))
DATA$id10 <- with(DATA, paste(localitycode_cali310, registrantid_cali310,sep="\t"))

DATA$addMovers <- with(DATA, mover1 + mover2 + mover3) #+ mover4


## voter record format changed
DATA$party_cali310[DATA$party_cali310=="npp"] <- "ds"
DATA$party_cali312[DATA$party_cali312=="npp"] <- "ds"
DATA$party_cali314[DATA$party_cali314=="npp"] <- "ds"


the.brks <- c(unique(classIntervals(with(subset(DATA, (mover4==1) & is.na(sameres12to14)
        & !(localitycode_cali312!=localitycode_cali314)
        & !(registrantid_cali312!=registrantid_cali314)
        & (zip_cali312!=zip_cali314)
        & party_cali312 %in% c("dem","rep")
        & party_cali314 %in% c("dem","rep")
        & !(is.na(as.numeric(DATA$zip_cali312))|is.na(as.numeric(DATA$zip_cali314)))
                                                ), move12to14.distance),n=30)$brks))
print(the.brks)


SUB <- subset(
        DATA, (mover4 == 1)
    ## & (mover1 == 1 | mover2 == 1 | mover3 == 1)
    & ((mover1 == 1 & (zip_cali307!="" & zip_cali309!=""))
    | (mover2 == 1 & (zip_cali309!="" & zip_cali310!=""))
    | (mover3 == 1 & (zip_cali310!="" & zip_cali312!="")))
        & is.na(sameres12to14)          #removes spouses
        & !(localitycode_cali312!=localitycode_cali314)
        & !(registrantid_cali312!=registrantid_cali314)
        & (zip_cali312!=zip_cali314)
        & !(is.na(as.numeric(DATA$zip_cali312))|is.na(as.numeric(DATA$zip_cali314)))
        & party_cali312 %in% c("dem","rep")
        & party_cali314 %in% c("dem","rep")
    )

## rm(DATA)

SUB$move07to09.distance[is.na(SUB$move07to09.distance)] <- 0
SUB$move09to10.distance[is.na(SUB$move09to10.distance)] <- 0
SUB$move10to12.distance[is.na(SUB$move10to12.distance)] <- 0

SUB$move07to09.distance.scaled <- with(SUB, as.vector(scale(log(move07to09.distance+1))))
SUB$move09to10.distance.scaled <- with(SUB, as.vector(scale(log(move09to10.distance+1))))
SUB$move10to12.distance.scaled <- with(SUB, as.vector(scale(log(move10to12.distance+1))))


mod14 <- lm(
    party_cali312==party_cali314 ~
    cut(move12to14.distance, the.brks,right=FALSE)
    + addMovers
    + move07to09.distance.scaled
    + move09to10.distance.scaled
    + move10to12.distance.scaled
   ,
    data = subset(
        SUB,
        party_cali312 %in% c("dem","rep")
        & party_cali314 %in% c("dem","rep")
        & addMovers > 0
        )
    )


the.coefs <- coef(mod14)
the.ses <- coefficients(summary(mod14))[,2]
the.cis <- confint(mod14)
the.call <- mod14$call

the.medians <- with(SUB[!(row.names(DATA)%in%names(mod14$na.action)),],
     tapply(X=move12to14.distance, INDEX=cut(move12to14.distance, the.brks,right=FALSE), median)
                    )

the.means <- with(SUB[!(row.names(DATA)%in%names(mod14$na.action)),],
     tapply(X=move12to14.distance, INDEX=cut(move12to14.distance, the.brks,right=FALSE), function(x) mean(log(x+1),na.rm=T))
     )

the.nas <- mod14$na.action

the.preds <- predict(
    mod14,
    newdata=data.frame(
        move12to14.distance=the.brks[-length(the.brks)]+0.01,
        addMovers=mean(SUB$addMovers),
        move07to09.distance.scaled = 0,
        move09to10.distance.scaled = 0,
        move10to12.distance.scaled = 0
        ),
    se=T
    )

save(the.coefs, the.ses, the.preds, the.cis, the.call, the.medians, the.means, the.nas, file="moves_distance_12to14_simple_noagecontrol_controlpastmoves_pastmoversonly_check.RData")




SUB <- subset(
        DATA, (mover4 == 1)
        & is.na(sameres12to14)          #removes spouses
        & !(localitycode_cali312!=localitycode_cali314)
        & !(registrantid_cali312!=registrantid_cali314)
        & (zip_cali312!=zip_cali314)
        & !(is.na(as.numeric(DATA$zip_cali312))|is.na(as.numeric(DATA$zip_cali314)))
        & party_cali312 %in% c("dem","rep")
        & party_cali314 %in% c("dem","rep")
    )

mod14 <- lm(
    party_cali312==party_cali314 ~
    cut(move12to14.distance, the.brks,right=FALSE)
   ,
    data = subset(
        SUB,
        party_cali312 %in% c("dem","rep")
        & party_cali314 %in% c("dem","rep")
        )
    )


the.coefs <- coef(mod14)
the.ses <- coefficients(summary(mod14))[,2]
the.cis <- confint(mod14)
the.call <- mod14$call

the.medians <- with(SUB[!(row.names(DATA)%in%names(mod14$na.action)),],
     tapply(X=move12to14.distance, INDEX=cut(move12to14.distance, the.brks,right=FALSE), median)
                    )

the.means <- with(SUB[!(row.names(DATA)%in%names(mod14$na.action)),],
     tapply(X=move12to14.distance, INDEX=cut(move12to14.distance, the.brks,right=FALSE), function(x) mean(log(x+1),na.rm=T))
     )

the.nas <- mod14$na.action

the.preds <- predict(
    mod14,
    newdata=data.frame(
        move12to14.distance=the.brks[-length(the.brks)]+0.01
        ),
    se=T
    )

save(the.coefs, the.ses, the.preds, the.cis, the.call, the.medians, the.means, the.nas, file="moves_distance_12to14_simple_noagecontrol_check.RData")
